# Ryan Copus, Ryan Hübert and Paige Pellaton
# "Trading Diversity? Judicial Diversity and Case Outcomes in Federal Courts"
# American Political Science Review

# File name: chp_apsr_06_plots.R
# Last revision date: May 30, 2024
# Questions or comments? Contact Ryan Hübert: https://ryanhubert.com/

# What does this script do?
# This script creates the plots and the tables for the paper and appendix.

# Last pre-print execution of this code: 
# > Date: May 1, 2024 
# > Machine: MacBook Pro 14" (2021 model) with Apple M1 Max chip and 64 GB RAM
# > OS: macOS Sonoma 14.4
# > R: version 4.3.2

################################################################################
# Before you run this script...
################################################################################

# Clear the workspace
rm(list = ls())

# This toggle allows you to not save the various outputs (plots and tables) 
# so that you can do some debugging. Simply switch it to FALSE to do so.
to.save <- TRUE

################################################################################
# Load packages and set options
################################################################################

require(tidyverse)
require(poolr) # for estimating number of tests
require(scales) # for making numbers with commas
require(cowplot) # for making grids for plots

# Turn off messages from summarize function
options(dplyr.summarise.inform = FALSE)

################################################################################
# Directory management
################################################################################

# Define and set the working directory.
wdir <- gsub("/[Cc]ode/?","",getwd())
setwd(wdir)

# Which directory has the results in it
# Note: this uses the LAST set of analyses run -- you need to manually change it
#       if you wish to generate plots for a different set of analyses
adir <- paste0(wdir, "/outs")
adir <- paste0(adir, "/", max(list.files(adir)[which(grepl("^2[0-9]+$",list.files(adir)))]))

# Which directory has the randomization data in it
rdir <- paste0(adir, "/randomization")

# Which directory has the effects in it
edir <- paste0(adir, "/rows")

# Which directory has the manuscript in it?
mdir <- paste0(wdir, "/ms")

# Create a directory that will contain the plots
pdir <- paste0(mdir, "/_img")
if(!dir.exists(pdir)){
  dir.create(pdir)
}

# Create a directory that will contain the tables
tdir <- paste0(mdir, "/_tab")
if(!dir.exists(tdir)){
  dir.create(tdir)
}

################################################################################
# Some useful plot and table tools
################################################################################

# A function for converting points to mm
unitpt <- function(x) {0.352778 * x} 

# The color we use for dot plots
est.color <- 'gray30'

# Make nice names for the treatments
tmap <- list("nontraditional" = "Nontraditional\nAppointees", 
             "nonwhite" = "Judges of Color", 
             "black" = "Black Judges", 
             "latino" = "Latino Judges",
             "woman" = "Women", 
             "white_woman" = "White Women", 
             "nonwhite_woman" = "Women of Color", 
             "nonwhite_man" = "Men of Color", 
             "republican" = "Republican Appointees")

# Make nice names for the outcomes
omap <- list("settlement" = "Settlements", 
             "dism_invol_jud_def" = "Defendant Wins")

# Make nice names for the subsets
smap <- list("democrat" = "Democratic Appointees",
             "republican" = "Republican Appointees", 
             "partisan" = "All Appointees")

# Make nice names for courts
cmap <- list("cacd" = "C.D. Cal.", "caed" = "E.D. Cal.", "casd" = "S.D. Cal.",
               "cand" = "N.D. Cal.", "flmd" = "M.D. Fla.", "flsd" = "S.D. Fla.",
               "gand" = "N.D. Ga.", "ilnd" = "N.D. Ill.", "laed" = "E.D. La.",
               "lawd" = "W.D. La.", "njd" = "D.N.J.", "nyed" = "E.D.N.Y.",
               "nynd" = "N.D.N.Y.", "nysd" = "S.D.N.Y.", "ord" = "D. Or.",
               "paed" = "E.D. Pa.", "txnd" = "N.D. Tex.", "txsd" = "S.D. Tex.",
               "waed" = "E.D. Wash.", "wawd" = "W.D. Wash.")

# Load a function that creates the tables in the paper
source(paste0(wdir,"/code/chp_apsr_makeTable.R"))

################################################################################
# Import the effects data and do a little clean up
################################################################################

# Import the effects data
ef <- do.call(bind_rows, lapply(list.files(edir), function (x) read_csv(paste0(edir,"/",x), col_types = cols(.default = "c"))))
ncols <- colnames(ef)[1:7]
icols <- colnames(ef)[which(grepl("^[0-9]+$", ef[1,]))]
ccols <- colnames(ef)[which(grepl("[A-z]", ef[1,]))]
ef <- mutate_at(ef, ncols, as.numeric)
ef <- mutate_at(ef, icols, as.numeric)
ef$model_id <- str_pad(ef$model_id, 3, "left", "0")

# Make better labels

## Subsets
ef$pmask <- unlist(lapply(ef$mask, function(x) smap[[gsub("/.+$","",x)]]))
ef$pmask <- factor(ef$pmask, levels=sort(unique(ef$pmask)))

## Outcomes
ef$outcome <- unlist(lapply(ef$outcome, function(x) omap[[x]]))
ef$outcome <- factor(ef$outcome, levels=sort(unique(ef$outcome), decreasing = TRUE))

## Treatments
ef$treatment1 <- unlist(lapply(ef$treatment, function(x) tmap[[x]]))
ef$treatment1 <- factor(ef$treatment1, levels=unlist(tmap))

## Plaintiff subsets
ef$pla_shares <- NA
tmp <- unlist(lapply(seq(1,nrow(ef)), function(x) grepl(paste0("pla_", ef[x,"treatment"], "(_[wp])?[=]1"), ef[x,"mask"])))
ef$pla_shares[tmp] <- "Cases Where Plaintiff(s) Share\nIdentity of Treatment Judges"
tmp <- unlist(lapply(seq(1,nrow(ef)), function(x) grepl(paste0("pla_", ef[x,"treatment"], "(_[wp])?[=]0"), ef[x,"mask"])))
ef$pla_shares[tmp] <- "Cases Where Plaintiff(s) Don't Share\nIdentity of Treatment Judges"
ef$pla_shares <- factor(ef$pla_shares, levels = sort(unique(ef$pla_shares), decreasing=TRUE))
rm(tmp)

## Create a new variable to make plaintiff analysis plot for appendix
ef$pla_shares1 <- NA
ef$pla_shares1[ef$treatment == "woman" & grepl("(=[10])", ef$mask)] <- "gender"
ef$pla_shares1[ef$treatment != "woman" & grepl("(_w=[10])", ef$mask)] <- "wru"
ef$pla_shares1[ef$treatment != "woman" & grepl("(_p=[10])", ef$mask)] <- "predictrace"

mask <- !is.na(ef$pla_shares) & !is.na(ef$pla_shares1)
ef$pla_shares1[mask] <- paste0(ef$pla_shares[mask], "\n(", ef$pla_shares1[mask],"/gender)")
ef$pla_shares1[mask] <- gsub("(wru|gender)[/]gender","wru and/or gender",ef$pla_shares1[mask])
ef$pla_shares1[mask] <- gsub("predictrace[/]gender","predictrace and gender",ef$pla_shares1[mask])
ef$pla_shares1 <- factor(ef$pla_shares1, levels = rev(sort(unique(ef$pla_shares1))))

## Number of judges and observations
ef$obs <- scales::comma(ef$obs)
ef$njud <- paste0(ef$judges_treat,"/",ef$judges_control)

## Are there president controls
ef$pres_controls1 <- NA
ef$pres_controls1 <- ifelse(ef$pres_controls == 1, "President\nControls", ef$pres_controls1)
ef$pres_controls1 <- ifelse(ef$pres_controls == 0, "No President\nControls", ef$pres_controls1)
ef$pres_controls1 <- factor(ef$pres_controls1, levels = rev(sort(unique(ef$pres_controls1))))

# Create confidence intervals adjusted for multiple comparisons
df <- read_csv(paste0(wdir,"/data/chp_apsr_case_data.csv"), show_col_types = FALSE)
df <- df[which(!is.na(df$file) & !is.na(df$CASE_ID) & is.na(df$to_drop) & df$df_main==1),]
subgroups <- c("nonwhite", "black", "latino", "woman", "white_woman", "nonwhite_woman", "nonwhite_man")
df <- df[!is.na(df$jid),c("jid", "republican", "democrat", subgroups)]
for(v in colnames(df)[4:length(colnames(df))]){
  df[[paste0("republican_",v)]] <- df[[paste0("republican")]] * df[[v]]
  df[[paste0("democrat_",v)]] <- df[[paste0("democrat")]] * df[[v]]
}
cormat <- cor(df[,colnames(df)[which(grepl("(republican|democrat)_",colnames(df)))]])
ntests <- c(meff(cormat, method = "nyholt"), meff(cormat, method = "liji"), meff(cormat, method = "gao"), meff(cormat, method = "galwey"))
ntests <- min(ntests)
crit.p <- 0.05/ntests
crit.t <- qnorm(1-(crit.p/2))
rm(df, cormat, subgroups)

ef$ciL_mc <- NA
ef$ciH_mc <- NA
ef$ntests <- NA
mc.mask <- which(!grepl("(nontrad|repub)",ef$treatment) & !grepl("=", ef$mask))
ef$ciL_mc[mc.mask] <- ef$effect[mc.mask] - crit.t * ef$se[mc.mask]
ef$ciH_mc[mc.mask] <- ef$effect[mc.mask] + crit.t * ef$se[mc.mask]
ef$ntests[mc.mask] <- ntests
rm(ntests, crit.p, crit.t, mc.mask)

## Any confidence interval that is beyond the figure borders, we should truncate
ef$ciH[which(ef$ciH>0.1)] <- 0.1
ef$ciL[which(ef$ciL < -0.1)] <- -0.1

################################################################################
# Plots for the descriptive statistics in the paper (Figures 1-3)
################################################################################

# Figure 1

## Note the following dataset was created using the FJC's Biographical Directory
## of Article III Federal Judges. See https://www.fjc.gov/history/judges.
bf <- read_csv(paste0(wdir,"/data/chp_apsr_judge_races.csv"), show_col_types = FALSE)
bf$Category <- NA
bf$Category[(bf$Race=="White") & (bf$Gender=="Man")] <- "White Men"
bf$Category[(bf$Race!="White") & (bf$Gender=="Man")] <- "Men of Color"
bf$Category[(bf$Race=="White") & (bf$Gender=="Woman")] <- "White Women"
bf$Category[(bf$Race!="White") & (bf$Gender=="Woman")] <- "Women of Color"
bf$Category <- factor(bf$Category, levels=arrange(summarize(group_by(bf,Category), n = n_distinct(jid)),n)$Category)

bf$Party <- factor(paste0(bf$Party,"s"))

tp <- NULL
for(y in seq(1977,2022)){
  tmp <- bf[bf$status_begin <= paste0(y,"-01-01") & bf$status_end > paste0(y,"-01-01"), c('jid',"Party",'Category')]
  tmp <- mutate(summarize(group_by(tmp, Party, Category), n = n_distinct(jid)), Year = y)
  tp <- bind_rows(tp, tmp)
  rm(tmp)
}

z1 <- ggplot(tp) + 
  annotate("rect", fill = "black", alpha = 0.05, xmin = 1977, xmax = 1981, ymin = -Inf, ymax = Inf) + 
  annotate("rect", fill = "black", alpha = 0.05, xmin = 1993, xmax = 2001, ymin = -Inf, ymax = Inf) + 
  annotate("rect", fill = "black", alpha = 0.05, xmin = 2009, xmax = 2017, ymin = -Inf, ymax = Inf) + 
  geom_bar(aes(x = Year, y = n, fill = Category), position = "stack", stat = "identity") + 
  facet_grid(cols = vars(Party)) + 
  labs(y = "Article III Judges", x = "") + 
  scale_fill_grey(start = 0.7, end = 0) +
  scale_x_continuous(breaks = seq(1973, 2021, 4)) + 
  scale_y_continuous(breaks = seq(0, 800, 100)) + 
  theme_bw() +
  theme(axis.title.x = element_blank(), 
        axis.text.x = element_text(size=8, angle = 45, hjust = 1, vjust = 1),
        strip.background = element_blank(),
        legend.title = element_blank(),
        panel.grid.minor = element_blank(),
        panel.grid.major.x = element_blank(),
        panel.grid.minor.x = element_blank(),
        plot.title = element_blank())

if(to.save){
  pdf(paste0(pdir,"/fg1.pdf"), width=6.5, height=2, family = "Times")
  print(z1)
  dev.off()
}
rm(z1, tp, bf)

# Figure 2

df <- read_csv(paste0(wdir,"/data/chp_apsr_case_data.csv"), show_col_types = FALSE)
df <- df[which(!is.na(df$file) & !is.na(df$CASE_ID) & is.na(df$to_drop)),]

df <- distinct(df[,c("OPEN_ID", "DISTRICT", "df_main", "df_scales", names(tmap))])

df$DISTRICT <- unlist(lapply(df$DISTRICT, function(x) ifelse(x %in% names(cmap), cmap[[x]], NA)))
df$DISTRICT <- factor(df$DISTRICT, levels=arrange(count(df[df$df_main==1,], DISTRICT), desc(n))$DISTRICT)

df$range <- NA
df$range <- ifelse(grepl("(ca[nsec]d|ord|wa[ew]d|ny[sn]d|la[we]d|txnd)", df$OPEN_ID), "1995-2016", df$range)
df$range <- ifelse(grepl("(ilnd|fl[sm]d|njd|nyed|paed|txsd|gand)", df$OPEN_ID), "1995-2020", df$range)
df$range <- ifelse(grepl("(txnd)", df$OPEN_ID), "1995-2014", df$range)
df$range <- ifelse(grepl("(ord)", df$OPEN_ID), "1997-2013", df$range)
df$range <- ifelse(grepl("(nynd)", df$OPEN_ID), "1995-2011", df$range)
df$range <- factor(df$range, levels = c("1995-2020", "1995-2016", "1995-2014", "1995-2011", "1997-2013"))

tp <- count(group_by(df[df$df_main==1,],range), DISTRICT)
tp$p <- paste0(round(tp$n/sum(tp$n),2)*100,"%")

z1 <- ggplot(tp) + 
  geom_bar(aes(x = DISTRICT, y = n, fill = range), stat = "identity") + 
  geom_text(aes(x = DISTRICT, y = n, label=p), vjust=-0.25, size = unitpt(8)) + 
  scale_y_continuous(limits = c(0,37000), breaks = seq(0,35000, 5000), label=comma) + 
  labs(y = "Number of Cases") + 
  theme_bw() +
  scale_fill_grey() + 
  guides(fill=guide_legend(title="Data Coverage Years")) + 
  theme(axis.title = element_text(size=8),
        axis.title.x = element_blank(), 
        axis.text = element_text(size=8),
        axis.text.x = element_text(size=8, angle = 45, hjust = 1, vjust = 1),
        strip.background = element_blank(),
        strip.text = element_text(size=8),
        legend.title = element_text(size=8),
        legend.text = element_text(size=8),
        legend.key.size = unit(8, 'pt'),
        panel.grid.minor = element_blank(),
        panel.grid.major.x = element_blank(),
        plot.title = element_blank())

if(to.save){
  pdf(paste0(pdir,"/fg2.pdf"), width=6.5, height=1.75, family = "Times")
  print(z1)
  dev.off()
}
rm(z1, df, tp)

# Figure 3

tp <- read_csv(paste0(wdir,"/data/chp_apsr_case_data.csv"), show_col_types = FALSE)
tp <- tp[which(!is.na(tp$file) & !is.na(tp$CASE_ID) & is.na(tp$to_drop)),]
tp <- distinct(tp[,c("df_main", "df_scales", "jid", "fjc_race", "woman", "republican")])
tp$Race <- tp$fjc_race
tp$Gender <- ifelse(tp$woman==1,"Women",ifelse(tp$woman==0,"Men",NA))
tp$Party <- ifelse(tp$republican==1,"Republican Appointees",ifelse(tp$republican==0,"Democratic Appointees",NA))

tp <- bind_rows(mutate(tp[tp$df_main==1,], Subset = "Main Dataset"),
                mutate(tp[tp$df_scales==1,], Subset = "SCALES Dataset"))
tp <- distinct(tp[,c("Subset","jid","Party","Race","Gender")])
tp <- summarize(group_by(tp, Subset, Party, Race, Gender), n = n_distinct(jid))


z1 <- ggplot(tp[grepl("Main",tp$Subset),]) + 
  geom_bar(aes(x = Race, y = n, group = Gender, fill = Gender), position=position_dodge(0.9), stat="identity") + 
  geom_text(aes(x = Race, y = n, group = Gender, label = paste0(Gender," (", n,")")), position=position_dodge(0.9), hjust = -0.1, vjust=0.40, size = unitpt(7), angle = 90, lineheight = 0.7) +
  labs(y = "Number of Judges in Dataset", subtitle = "Main Dataset") + 
  scale_fill_manual(values = c("grey65", "grey40")) + 
  scale_y_continuous(limits = c(0,290), breaks = seq(0,290,25), labels = scales::comma) +
  theme_bw() +
  theme(axis.title = element_text(size=8),
        axis.title.x = element_blank(), 
        axis.text.x = element_text(size=8, angle = 45, hjust = 1, vjust = 1),
        axis.text = element_text(size=8),
        strip.background = element_blank(),
        strip.text = element_text(size=8),
        legend.position = "none",
        plot.title = element_blank(), 
        plot.subtitle = element_blank(), 
        panel.grid.major.x = element_blank(),
        panel.grid.minor = element_blank()) + 
  facet_grid(cols = vars(Party))

# Figure A.2

z2 <- ggplot(tp[grepl("SCALES",tp$Subset),]) + 
  geom_bar(aes(x = Race, y = n, group = Gender, fill = Gender), position=position_dodge(0.9), stat="identity") + 
  geom_text(aes(x = Race, y = n, group = Gender, label = paste0(Gender," (", n,")")), position=position_dodge(0.9), hjust = -0.1, vjust=0.40, size = unitpt(7), angle = 90, lineheight = 0.7) +
  labs(y = "Number of Judges in Dataset", subtitle = "SCALES Dataset") +
  scale_fill_manual(values = c("grey65", "grey40")) + 
  scale_y_continuous(limits = c(0,474), breaks = seq(0,474,50), labels = scales::comma) +
  theme_bw() +
  theme(axis.title = element_text(size=8),
        axis.title.x = element_blank(), 
        axis.text.x = element_text(size=8, angle = 45, hjust = 1, vjust = 1),
        axis.text = element_text(size=8),
        strip.background = element_blank(),
        strip.text = element_text(size=8),
        legend.position = "none",
        plot.title = element_blank(), #element_text(hjust = 0.5),
        plot.subtitle = element_blank(), #element_text(hjust = 0.5),
        panel.grid.major.x = element_blank(),
        panel.grid.minor = element_blank()) + 
  facet_grid(cols = vars(Party))

if(to.save){
  pdf(paste0(pdir,"/fg3.pdf"), width=4.5, height=2.3, family = "Times")
  print(z1)
  dev.off()
  
  pdf(paste0(pdir,"/fgA2.pdf"), width=4.5, height=2.3, family = "Times")
  print(z2)
  dev.off()
}
rm(z1, z2, df, tp)

# Figure A.3

## Load the ROC curve data
rf <- read_csv(paste0(rdir, "/roc_miscoded.csv"), show_col_types = FALSE)
rf <- rf[rf$algorithm == "my_ensemble",]
rf$count <- "Predicting Outcome Miscoding\nwith Judge Characteristics"

z1 <- ggplot(rf) + 
  geom_abline(slope = 1, intercept = 0, linewidth = 0.3, linetype = "dashed") + 
  geom_line(aes(x = fpr, y = tpr, color = model), stat = "identity") +
  labs(y = "True Positive Rate", x = "False Positive Rate\n ") +
  scale_x_continuous(expand = c(0.01,0.01)) + 
  scale_y_continuous(expand = c(0.01,0.01)) + 
  scale_color_manual(values = c("grey60", "black")) +
  theme_bw() + 
  theme(panel.grid = element_blank(), 
        legend.position = "none",
        axis.text = element_blank(),
        axis.ticks = element_blank(),
        axis.title = element_text(size = 9),
        plot.title = element_text(hjust = 0.5),
        strip.background = element_blank()) + 
  facet_grid(cols = vars(count))

if(to.save){
  pdf(paste0(pdir,"/fgA3.pdf"), width=2, height=2, family = "Times")
  print(z1)
  dev.off()
}

rm(z1, rf)

################################################################################
# Plots for the effects in the paper (Figures 4-8)
################################################################################

# Figure 4

fg4 <- (grepl("(Nontraditional|Republican)", ef$treatment1)) & (ef$dataset=="main") & (!grepl("[/]pla",ef$mask)) & (ef$pres_controls==0)

z1 <- ggplot(ef[which(fg4 & (grepl("(Nontraditional)", ef$treatment1))),]) + 
  geom_hline(yintercept = 0, linewidth = 0.25, linetype = "dashed") +
  geom_linerange(aes(x = outcome, ymax = ciH, ymin = ciL), linewidth = 2, position = position_dodge(0.5), color = est.color) +
  geom_point(aes(x = outcome, y = effect), size = 2, fill = "white", stroke = 1, position = position_dodge(0.5), color = est.color, shape = 21) +
  labs(y = paste0("ATE of Assigning Nontraditional Appointees\n(Control Group: Traditional Appointees)"), x = 'Case Outcomes') + 
  scale_y_continuous(limits = c(-0.1,0.1), breaks = round(seq(-0.1,0.1, 0.02),2)) +
  scale_x_discrete(expand = c(0.6,0)) +
  geom_text(aes(x = outcome, y = 0.1, label = obs), size = unitpt(8), position = position_dodge(0.5), angle = 90, hjust = 1, vjust = 0.5) +
  geom_text(aes(x = outcome, y = -0.1, label = njud), size = unitpt(8), position = position_dodge(0.5), angle = 90, hjust = 0, vjust = 0.5) +
  theme_bw() +
  theme(title = element_text(size=9),
        plot.title = element_text(hjust = 0.5),
        legend.position = "none",
        panel.grid.minor = element_blank(),
        axis.text.y = element_text(size=8), 
        axis.text.x = element_text(size=8, angle = 45, hjust = 1, vjust = 1),
        axis.title.x = element_blank(),
        strip.background = element_blank()) + 
  facet_grid(cols = vars(pmask))

z2 <- ggplot(ef[which(fg4 & (grepl("(Republican)", ef$treatment1))),]) + 
  geom_hline(yintercept = 0, linewidth = 0.25, linetype = "dashed") +
  geom_linerange(aes(x = outcome, ymax = ciH, ymin = ciL), linewidth = 2, color = est.color) + #
  geom_point(aes(x = outcome, y = effect, shape = pres_controls1), size = 2, fill = "white", stroke = 1, color = est.color, shape = 21) +
  labs(y = paste0("ATEs of Assigning Republican Appointees\n(Control Group: Democratic Appointees)"), x = 'Case Outcomes') + 
  scale_y_continuous(limits =  c(-0.1,0.1), breaks = round(seq(-0.1,0.1, 0.02),2)) +
  scale_x_discrete(expand = c(0.6,0)) +
  geom_text(aes(x = outcome, y = 0.1, label = obs), size = unitpt(8), position = position_dodge(0.5), angle = 90, hjust = 1, vjust = 0.5) +
  geom_text(aes(x = outcome, y = -0.1, label = njud), size = unitpt(8), position = position_dodge(0.5), angle = 90, hjust = 0, vjust = 0.5) +
  theme_bw() +
  theme(title = element_text(size=9),
        plot.title = element_text(hjust = 0.5),
        legend.position = "none",
        panel.grid.minor = element_blank(),
        axis.text.y = element_text(size=8),
        axis.text.x = element_text(size=8, angle = 45, hjust = 1, vjust = 1),
        axis.title.x = element_blank(),
        strip.background = element_blank()) + 
  facet_grid(cols = vars(pmask)) 

if(to.save){
  pdf(paste0(pdir,"/fg4.pdf"), width=5, height=3, family = "Times")
  aligned <- align_plots(z1, z2, align = "h")
  print(plot_grid(ggdraw(aligned[[1]]), ggdraw(aligned[[2]]),
                  rel_widths =  c(0.64,0.36),
                  nrow = 1, ncol = 2))
  dev.off()
  
  # Table E.1
  makeTable("fg4", ef[fg4,], tdir)
}
rm(z1, z2)

# Figure C.3

fgC3 <- (grepl("(Nontraditional)", ef$treatment1)) & (ef$dataset=="main") & (!grepl("[/]pla",ef$mask))

z1 <- ggplot(ef[which(fgC3),]) + 
  geom_hline(yintercept = 0, linewidth = 0.25, linetype = "dashed") +
  geom_linerange(aes(x = outcome, ymax = ciH, ymin = ciL, group = pres_controls1, color = pres_controls1), linewidth = 2, position = position_dodge(0.7)) + #
  geom_point(aes(x = outcome, y = effect, group = pres_controls1, shape = pres_controls1, color = pres_controls1), size = 2, fill = "white", stroke = 1, position = position_dodge(0.7)) +
  labs(y = paste0("ATEs of Assigning Nontraditional Appointees\n(Control Group: Traditional Appointees)"), x = 'Case Outcomes') + 
  scale_shape_manual(name="X", values = 20 + seq_along(unique(ef$pres_controls1))) +
  scale_color_manual(name="X", values = c(est.color,est.color)) +
  scale_y_continuous(limits = c(-0.1,0.1), breaks = round(seq(-0.1,0.1, 0.02),2)) +
  scale_x_discrete(expand = c(0.6,0)) +
  geom_text(aes(x = outcome, y = max(c(-0.1,0.1)), label = obs), size = unitpt(8), position = position_dodge(0.6), angle = 90, hjust = 1, vjust = 0.5) +
  geom_text(aes(x = outcome, y = min(c(-0.1,0.1)), label = njud), size = unitpt(8), position = position_dodge(0.6), angle = 90, hjust = 0, vjust = 0.5) +
  theme_bw() +
  guides(shape = guide_legend(byrow = TRUE)) + 
  theme(title = element_text(size=9),
        plot.title = element_text(hjust = 0.5),
        panel.grid.minor = element_blank(),
        legend.title = element_blank(),
        panel.grid.major.x = element_blank(),
        axis.text.y = element_text(size=8),
        axis.text.x = element_text(size=8, angle = 45, hjust = 1, vjust = 1),
        legend.spacing.y = unit(10, 'pt'),
        axis.title.x = element_blank(),
        strip.background = element_blank()) + 
  facet_grid(cols = vars(pmask))

if(to.save){
  pdf(paste0(pdir,"/fgC3.pdf"), width=5, height=2.75, family = "Times")
  print(z1)
  dev.off()
  
  # Table E.5
  makeTable("fgC3", ef[fgC3,], tdir)
}
rm(z1)

# Figure 5

fg5 <- (grepl("(Nontraditional)", ef$treatment1)) & (ef$dataset=="scales") & (!grepl("[/]pla",ef$mask)) & (ef$pres_controls==0)

z1 <- ggplot(ef[which(fg5),]) + 
  geom_hline(yintercept = 0, linewidth = 0.25, linetype = "dashed") +
  geom_linerange(aes(x = outcome, ymax = ciH, ymin = ciL), linewidth = 2, position = position_dodge(0.5), color = est.color) +
  geom_point(aes(x = outcome, y = effect), size = 2, fill = "white", stroke = 1, position = position_dodge(0.5), color = est.color, shape = 21) +
  labs(y = paste0("ATE of Assigning Nontraditional Appointees\n(Control Group: Traditional Appointees)"), x = 'Case Outcomes') + 
  scale_y_continuous(limits = c(-0.1,0.1), breaks = round(seq(-0.1,0.1, 0.02),2)) +
  scale_x_discrete(expand = c(0.6,0)) +
  geom_text(aes(x = outcome, y = 0.1, label = obs), size = unitpt(8), position = position_dodge(0.5), angle = 90, hjust = 1, vjust = 0.5) +
  geom_text(aes(x = outcome, y = -0.1, label = njud), size = unitpt(8), position = position_dodge(0.5), angle = 90, hjust = 0, vjust = 0.5) +
  theme_bw() +
  theme(title = element_text(size=9),
        plot.title = element_text(hjust = 0.5),
        legend.position = "none",
        panel.grid.minor = element_blank(),
        axis.text.y = element_text(size=8),
        axis.text.x = element_text(size=8, angle = 45, hjust = 1, vjust = 1),
        axis.title.x = element_blank(),
        strip.background = element_blank()) + 
  facet_grid(cols = vars(pmask))

if(to.save){
  pdf(paste0(pdir,"/fg5.pdf"), width=3.2, height=3, family = "Times")
  print(z1)
  dev.off()
  
  # Table E.2
  makeTable("fg5", ef[fg5,], tdir)
}
rm(z1)

# Figure 6

out <- "Settlements"
fg6 <- (!grepl("(Nontraditional|Republican)", ef$treatment1)) & (grepl(out, ef$outcome)) & (ef$dataset=="main") & (!grepl("[/]pla",ef$mask)) & (ef$pres_controls==0) 
fg6 <- fg6 & (!is.na(ef$effect))

z1 <- ggplot(ef[fg6,]) + 
  geom_hline(yintercept = 0, linewidth = 0.25, linetype = "dashed") + #
  geom_linerange(aes(x = treatment1, ymax = ciH, ymin = ciL), linewidth = 2, position = position_dodge(0.5), color = est.color) + #
  geom_linerange(aes(x = treatment1, ymax = ciH_mc, ymin = ciL_mc), linewidth = 1, position = position_dodge(0.5), color = est.color) + #
  geom_point(aes(x = treatment1, y = effect, shape = treatment1), size = 2, fill = "white", stroke = 1, position = position_dodge(0.5), color = est.color, shape = 21) +
  labs(y = paste0("ATEs on ", out, " of Assigning Specific\nTypes of Nontraditional Appointees\n(Control Group: Traditional Appointees)"), x = 'Judge Type Subsets') + 
  scale_y_continuous(limits = c(-0.105,0.105), breaks = round(seq(-0.1,0.1, 0.02),2)) +
  scale_x_discrete(expand = c(0.15,0)) +
  geom_text(aes(x = treatment1, y = 0.105, label = obs), size = unitpt(8), position = position_dodge(0.6), angle = 90, hjust = 1, vjust = 0.5) +
  geom_text(aes(x = treatment1, y = -0.105, label = njud), size = unitpt(8), position = position_dodge(0.6), angle = 90, hjust = 0, vjust = 0.5) +
  theme_bw() +
  theme(title = element_text(size=9),
        plot.title = element_text(hjust = 0.5),
        legend.position = "none",
        panel.grid.minor.y = element_blank(),
        panel.grid.major.x = element_blank(),
        axis.text.y = element_text(size=8),
        axis.text.x = element_text(size=8, angle = 45, hjust = 1, vjust = 1),
        axis.title.x = element_blank(),
        strip.background = element_blank()) + 
  facet_grid(cols = vars(pmask), scales = "free")

if(to.save){
  pdf(paste0(pdir,"/fg6.pdf"), width=5, height=3, family = "Times")
  print(z1)
  dev.off()
  
  # Table E.3
  makeTable("fg6", ef[fg6,], tdir)
}
rm(z1)

# Figure C.1

out <- "Defendant Wins"
fgC1 <- (!grepl("(Nontraditional|Republican)", ef$treatment1)) & (grepl(out, ef$outcome)) & (ef$dataset=="main") & (!grepl("[/]pla",ef$mask)) & (ef$pres_controls==0) 
fgC1 <- fgC1 & (!is.na(ef$effect))

z1 <- ggplot(ef[fgC1,]) + 
  geom_hline(yintercept = 0, linewidth = 0.25, linetype = "dashed") + #
  geom_linerange(aes(x = treatment1, ymax = ciH, ymin = ciL), linewidth = 2, position = position_dodge(0.5), color = est.color) + #
  geom_linerange(aes(x = treatment1, ymax = ciH_mc, ymin = ciL_mc), linewidth = 1, position = position_dodge(0.5), color = est.color) + #
  geom_point(aes(x = treatment1, y = effect, shape = treatment1), size = 2, fill = "white", stroke = 1, position = position_dodge(0.5), color = est.color, shape = 21) +
  labs(y = paste0("ATEs on ", out, " of Assigning Specific\nTypes of Nontraditional Appointees\n(Control Group: Traditional Appointees)"), x = 'Judge Type Subsets') + 
  scale_y_continuous(limits = c(-0.105,0.105), breaks = round(seq(-0.1,0.1, 0.02),2)) +
  scale_x_discrete(expand = c(0.15,0)) +
  geom_text(aes(x = treatment1, y = 0.105, label = obs), size = unitpt(8), position = position_dodge(0.6), angle = 90, hjust = 1, vjust = 0.5) +
  geom_text(aes(x = treatment1, y = -0.105, label = njud), size = unitpt(8), position = position_dodge(0.6), angle = 90, hjust = 0, vjust = 0.5) +
  theme_bw() +
  theme(title = element_text(size=9),
        plot.title = element_text(hjust = 0.5),
        legend.position = "none",
        panel.grid.minor.y = element_blank(),
        panel.grid.major.x = element_blank(),
        axis.text.y = element_text(size=8),
        axis.text.x = element_text(size=8, angle = 45, hjust = 1, vjust = 1),
        axis.title.x = element_blank(),
        strip.background = element_blank()) + 
  facet_grid(cols = vars(pmask), scales = "free")

if(to.save){
  pdf(paste0(pdir,"/fgC1.pdf"), width=5, height=3, family = "Times")
  print(z1)
  dev.off()
  
  # Table E.6
  makeTable("fgC1", ef[fgC1,], tdir)
}
rm(z1)

# Figure 7

out <- "Settlements"
fg7 <- (grepl("^(Judges of Color|Women)$", ef$treatment1)) & (grepl(out, ef$outcome)) & (ef$dataset=="main") & (grepl("[/]pla",ef$mask)) & (!grepl("p=",ef$mask)) & (ef$pres_controls==0) 

z1 <- ggplot(ef[fg7,]) + 
  geom_hline(yintercept = 0, linewidth = 0.25, linetype = "dashed") + #
  geom_linerange(aes(x = treatment1, ymax = ciH, ymin = ciL, group = pla_shares), linewidth = 2, position = position_dodge(0.5), color = est.color) + #
  geom_point(aes(x = treatment1, y = effect, group = pla_shares, shape = pla_shares), size = 2, fill = "white", stroke = 1, position = position_dodge(0.5), color = est.color) +
  labs(y = paste0("ATEs on Settlements of Assigning Specific\nTypes of Nontraditional Appointees\n(Control Group: Traditional Appointees)"), x = 'Judge Type Subsets') + 
  scale_shape_manual(name="", values = 21 + seq_along(unique(ef$pla_shares[fg7]))) +
  scale_y_continuous(limits = c(-0.1,0.1), breaks = round(seq(-0.1,0.1, 0.02),2)) +
  geom_text(aes(x = treatment1, y = 0.1, label = obs, group = pla_shares), size = unitpt(8), position = position_dodge(0.6), angle = 90, hjust = 1, vjust = 0.5) +
  geom_text(aes(x = treatment1, y = -0.1, label = njud, group = pla_shares), size = unitpt(8), position = position_dodge(0.6), angle = 90, hjust = 0, vjust = 0.5) +
  theme_bw() +
  guides(shape = guide_legend(byrow = TRUE)) + 
  theme(title = element_text(size=9),
        plot.title = element_text(hjust = 0.5),
        panel.grid.minor = element_blank(),
        panel.grid.major.x = element_blank(),
        legend.spacing.y = unit(5, 'pt'),
        axis.text.y = element_text(size=8),
        axis.text.x = element_text(size=8, angle = 45, hjust = 1, vjust = 1),
        axis.title.x = element_blank(),
        strip.background = element_blank()) + 
  facet_grid(cols = vars(pmask))

if(to.save){
  pdf(paste0(pdir,"/fg7.pdf"), width=6.25, height=3, family = "Times")
  print(z1)
  dev.off()
  
  # Table E.4
  makeTable("fg7", ef[fg7,], tdir)
}
rm(z1, out)

# Figure C.2

fgC2 <- grepl("(Settlements)", ef$outcome) & !grepl("All", ef$pmask) & (ef$pres_controls == 0)
fgC2 <- fgC2 & grepl(paste0("(_(p|w)=|woman=)"), ef$mask) & (!is.na(ef$effect))

z1 <- ggplot(distinct(ef[fgC2,colnames(ef)[colnames(ef)!='model_id']])) + 
  geom_hline(yintercept = 0, linewidth = 0.25, linetype = "dashed") + #
  geom_linerange(aes(x = treatment1, ymax = ciH, ymin = ciL, group = pla_shares1, color = pla_shares1), linewidth = 2, position = position_dodge(0.75)) + #
  geom_point(aes(x = treatment1, y = effect, group = pla_shares1, color = pla_shares1, shape = pla_shares1), size = 2, fill = "white", stroke = 1, position = position_dodge(0.75)) +
  labs(y = paste0("ATEs on Settlements of Assigning Specific\nTypes of Nontraditional Appointees\n(Control Group: Traditional Appointees)"), x = 'Judge Type Subsets') + 
  scale_shape_manual(name="X", values = c(21,21,22,22)) +
  scale_color_manual(name="X", values = c("#7b3294", "#c2a5cf", "#008837", "#a6dba0")) +
  scale_y_continuous(limits = c(-0.1,0.1), breaks = round(seq(-0.1,0.1, 0.02),2)) +
  geom_text(aes(x = treatment1, y = 0.1, label = obs, group = pla_shares1), size = unitpt(6), position = position_dodge(0.75), angle = 90, hjust = 1, vjust = 0.5) +
  geom_text(aes(x = treatment1, y = -0.1, label = njud, group = pla_shares1), size = unitpt(6), position = position_dodge(0.75), angle = 90, hjust = 0, vjust = 0.5) +
  theme_bw() +
  guides(shape = guide_legend(byrow = TRUE)) + 
  theme(title = element_text(size=9),
        plot.title = element_text(hjust = 0.5),
        legend.position = "none",
        legend.title = element_blank(),
        panel.grid.minor = element_blank(),
        panel.grid.major.x = element_blank(),
        legend.spacing.y = unit(0, 'pt'),
        legend.text = element_text(size=7,margin = margin(t = 2, b = 2)), # 
        axis.text.y = element_text(size=8),
        axis.text.x = element_text(size=8, angle = 45, hjust = 1, vjust = 1),
        axis.title.x = element_blank(),
        strip.background = element_blank()) + 
  facet_grid(cols = vars(pmask), scales = "free") #, rows = vars(outcome)

if(to.save){
  pdf(paste0(pdir,"/fgC2.pdf"), width=6.5, height=3, family = "Times")
  print(z1)
  dev.off()
  
  # Table E.7
  makeTable("fgC2", distinct(ef[fgC2,colnames(ef)[colnames(ef)!='model_id']]), tdir)
}
rm(z1)

################################################################################
# Plots for descriptive statistics in appendix
################################################################################

# Figure A.1

# How many cases dropped due to judge coding
df <- read_csv(paste0(wdir,"/data/chp_apsr_case_data.csv"), show_col_types = FALSE)
df <- df[which(!is.na(df$file) & !is.na(df$CASE_ID)),]
df <- df[, c("OPEN_ID", "assigned_judge", "entry_judge")]

df$cat <- NA
df$cat[!is.na(df$assigned_judge) & is.na(df$entry_judge)] <- "Assigned Judge Listed but\nNo First Judge Listed"
df$cat[is.na(df$assigned_judge) & !is.na(df$entry_judge)] <- "First Judge Listed but\nNo Assigned Judge Listed"
df$cat[!is.na(df$assigned_judge) & !is.na(df$entry_judge) & df$assigned_judge==df$entry_judge] <- "Same Assigned\nand First Judge"
df$cat[!is.na(df$assigned_judge) & !is.na(df$entry_judge) & df$assigned_judge!=df$entry_judge] <- "Different Assigned\nand First Judges"
df$cat[is.na(df$assigned_judge) & is.na(df$entry_judge)] <- "No Assigned or\nFirst Judge Listed"

df$cat <- factor(df$cat, arrange(count(df,cat), desc(n))$cat)

tp <- count(df, cat)
tp$p <- paste0("(", round(tp$n/sum(tp$n),2)*100,"%)")

z1 <- ggplot(tp) + 
  geom_bar(aes(x = cat, y = n), stat="identity") + 
  geom_text(aes(x = cat, y = n, label=scales::comma(n)), vjust=-2.00, size = unitpt(7)) + 
  geom_text(aes(x = cat, y = n, label=p), vjust=-0.50, size = unitpt(7)) + 
  labs(y = "Number of Cases", x = 'Data Cleaning Steps') +
  scale_y_continuous(limits = c(0,390000), breaks = seq(0,390000,50000), labels = scales::comma) +
  theme_bw() +
  theme(axis.title = element_text(size=8),
        axis.title.x = element_blank(), 
        axis.text = element_text(size=8),
        axis.text.x = element_text(size=7, lineheight = 0.7, angle = 35, hjust = 1, vjust = 1),
        strip.background = element_blank(),
        strip.text = element_text(size=8),
        legend.title = element_text(size=8),
        legend.text = element_text(size=8),
        legend.key.size = unit(8, 'pt'),
        panel.grid.minor = element_blank(),
        plot.title = element_blank())

if(to.save){
  pdf(paste0(pdir,"/fgA1-2.pdf"), width=2.5, height=2.2, family = "Times")
  print(z1)
  dev.off()
}

# Table A.1

tp <- read_csv(paste0(wdir,"/outs/chp_apsr_wru_distribution.csv"), show_col_types = FALSE)

fn <- paste0(tdir,"/tabA1.tex")
write_lines("\\begin{table}[ht]", fn, append = FALSE)
write_lines("\\centering\\footnotesize", fn, append = TRUE)
write_lines("\\caption{We summarize the distribution of predicted probabilities for the plaintiffs' race classifications. Note: the ``None'' category includes plaintiffs for whom the \\texttt{wru} algorithm returned predicted probabilities less than 0.5 for every racial/ethnic category.}", fn, append = TRUE)
write_lines("\\label{tab:app_race_pred}", fn, append = TRUE)
write_lines("\\begin{tabular}{|l|c|c|c|c|c|}", fn, append = TRUE)
write_lines("\\hline", fn, append = TRUE)
write_lines("Classified & Mean & Mean & Mean & Mean & Mean \\\\", fn, append = TRUE)
write_lines("Plaintiff Race & Pr(White) & Pr(Black) & Pr(Hispanic) & Pr(Asian) & Pr(Other)\\\\\\hline", fn, append = TRUE)
for(i in 1:nrow(tp)){
  row <- paste0(paste0(tp[i,],collapse=" & ")," \\\\\\hline")
  write_lines(row, fn, append = TRUE)  
}
write_lines("\\end{tabular}", fn, append = TRUE)
write_lines("\\end{table}", fn, append = TRUE)
rm(tp)

# Table A.2

fn <- paste0(tdir,"/tabA2.tex")
write_lines("\\begin{table}[ht]", fn, append = FALSE)
write_lines("\\centering", fn, append = TRUE)
write_lines("\\caption{The percentage of suits belonging to each civil rights category based on a random sample of 100 original complaints. Percentages add up to greater than 100\\% because some of the cases involve more than one type of alleged civil rights violation.}\\label{tab:civil_rights_case_type}", fn, append = TRUE)
write_lines("\\begin{tabular}{c|c|c|c|c|c|c}", fn, append = TRUE)
write_lines("ADA Access & Employment & Race & Gender & Age & Disability & Police \\\\\\hline", fn, append = TRUE)
write_lines("15\\% & 40\\% & 18\\% & 16\\% & 11\\% & 23\\% & 20\\% \\\\", fn, append = TRUE)
write_lines("\\end{tabular}", fn, append = TRUE)
write_lines("\\end{table}", fn, append = TRUE)

################################################################################
# Non-random assignment of cases to Chief Judge Preska 
################################################################################

# Figure B.1

# Load the case data and subset to NYSD only
df <- read_csv(paste0(wdir,"/data/chp_apsr_case_data.csv"), show_col_types = FALSE)
df <- df[which(!is.na(df$file) & !is.na(df$CASE_ID)),]
df <- df[is.na(df$to_drop) | (grepl("^L*P*V*$", df$to_drop)),]
df <- df[grepl("nysd", df$OPEN_ID),]
df$jid <- as.character(df$jid)

# Add outcome counts
df$dism_invol <- ifelse(grepl("dism_invol", df$OUTCOME_IDB), 1, 0)
df$jud_def <- ifelse(grepl("jud_(def)", df$OUTCOME_IDB), 1, 0)

# Create dataset of the number of cases heard by each NYSD judge each year
tp <- mutate(summarize(group_by(df, jid, YEAR), n = n()))
tp <- left_join(tp, mutate(summarize(group_by(df, jid, YEAR), jud_def = sum(jud_def))))

tp1 <- mutate(summarize(group_by(df[df$chief==1,], YEAR), n = n()), jid = "CHIEF")
tp1 <- left_join(tp1, mutate(summarize(group_by(df[df$chief==1,], YEAR), jud_def = sum(jud_def))))

tp <- bind_rows(tp[, c("YEAR", "jid", "jud_def", "n")],
                tp1[, c("YEAR", "jid", "jud_def", "n")])
rm(tp1)

tp2 <- bind_rows(mutate(rename(tp[,c("YEAR", "jid", "jud_def")], n = jud_def), count = "Judgments for Defendant"), 
                 mutate(tp[,c("YEAR", "jid", "n")], count = "All Cases Heard"))
tp2$count <- as.factor(tp2$count)

tp0 <- ungroup(left_join(mutate(summarize(group_by(df, jid, YEAR), n = n())), 
                        mutate(summarize(group_by(df, jid, YEAR), jud_def = sum(jud_def))),
                        by = join_by(jid, YEAR)))

tp1 <- ungroup(left_join(mutate(summarize(group_by(df[df$chief==1,], YEAR), n = n()), jid = "CHIEF"), 
                         mutate(summarize(group_by(df[df$chief==1,], YEAR), jud_def = sum(jud_def))),
                         by = join_by(YEAR)))

tp <- bind_rows(tp0[, c("YEAR", "jid", "jud_def", "n")],
                tp1[, c("YEAR", "jid", "jud_def", "n")])

rm(tp0,tp1)

tp <- bind_rows(mutate(rename(tp[,c("YEAR", "jid", "jud_def")], n = jud_def), count = "Judgments for Defendant"),
                mutate(tp[,c("YEAR", "jid", "n")], count = "All Cases Heard"))
tp$count <- as.factor(tp$count)

# Load the ROC curve data
rf <- read_csv(paste0(rdir, "/roc_nontraditional_preska.csv"), show_col_types = FALSE)
rf <- rf[rf$algorithm == "my_ensemble",]
rf$count <- "ROC Randomization Test"

# Make the plots
z1 <- ggplot(tp2) + 
  annotate("rect", fill = "black", alpha = 0.1, xmin = 2009.5, xmax = 2016.5, ymin = -Inf, ymax = Inf) + 
  stat_summary(aes(x = YEAR, y = n), fun=mean, geom="line", linewidth = 2, color = "gray20") + 
  geom_point(aes(x = YEAR, y = n), data = tp2[tp2$jid != "CHIEF",], size = 1, color = "gray70", alpha = 0.2) + 
  geom_point(aes(x = YEAR, y = n), data = tp2[tp2$jid == "R1689",], size = 2, color = "black") + 
  geom_point(aes(x = YEAR, y = n), data = tp2[tp2$jid == "CHIEF",], shape = 2, color = "black", size = 3, stroke = 1, alpha = 0.7) + 
  scale_x_continuous(breaks = seq(1995, 2016, 2)) + 
  scale_y_continuous(breaks = seq(0, 150, 50), limits = c(0,150)) + 
  labs(y = "Number of Cases\nHeard by NYSD Judges", x = "Year") + 
  theme_bw() + 
  theme(axis.text.x = element_text(size=8, angle = 45, hjust = 1, vjust = 1),
        axis.text.y = element_text(size=8),
        strip.background = element_blank(),
        axis.title = element_text(size = 9),
        axis.title.x = element_blank()) + 
  facet_grid(cols = vars(count))

## ROC Curve
z2 <- ggplot(rf) + 
  geom_abline(slope = 1, intercept = 0, linewidth = 0.3, linetype = "dashed") + 
  geom_line(aes(x = fpr, y = tpr, color = model), stat = "identity") +
  labs(y = "True Positive Rate", x = "False Positive Rate\n ") +
  scale_x_continuous(expand = c(0.01,0.01)) + 
  scale_y_continuous(expand = c(0.01,0.01)) + 
  scale_color_manual(values = c("grey60", "black")) +
  theme_bw() + 
  theme(panel.grid = element_blank(), 
        legend.position = "none",
        axis.text = element_blank(),
        axis.ticks = element_blank(),
        axis.title = element_text(size = 9),
        plot.title = element_text(hjust = 0.5),
        strip.background = element_blank()) + 
  facet_grid(cols = vars(count))

if(to.save){
  pdf(paste0(pdir,"/fgB1.pdf"), width=6, height=1.75, family = "Times")
  print(plot_grid(ggdraw(z1), NULL, ggdraw(z2),
                  rel_widths =  c(2.5,0.1,1),
                  nrow = 1, ncol = 3))
  dev.off()
}

################################################################################
# Randomization tests
################################################################################

# Figure B.2

# Load the ROC curve data
rocs <- list.files(rdir)
rocs <- rocs[which(grepl("roc",rocs) & !grepl("(miscoded|preska)",rocs))]

rf <- NULL
pf <- NULL
for(f in rocs){
  rf1 <- read_csv(paste0(rdir,"/", f), show_col_types = FALSE)
  rf1 <- rf1[rf1$algorithm == "my_ensemble",]
  
  rf1$mask <- str_split(f,"[_.]")[[1]][3]
  rf1$mask <- ifelse(rf1$mask == "democrat", "democratic", rf1$mask)
  rf1$mask <- paste0("Cases Assigned to\n",str_to_title(rf1$mask)," Appointees")
  rf1$mask <- rf1$mask
  
  rf1$scales <- ifelse(grepl("scales",f), "SCALES Dataset", "Main Dataset")
  
  rf1$model <- ifelse(rf1$model=="benchmark", "benchmark", "saturated")
  
  rf1 <- rf1[,c("fpr","tpr","model","mask","scales")]
  rf <- bind_rows(rf,rf1)
  
  # load predicted probabilities for making the EQQ plot
  pf1 <- read_csv(paste0(rdir,"/", str_replace(f,"roc","preds")), show_col_types = FALSE)
  pf1 <- pf1[pf1$algorithm == "my_ensemble", c("OPEN_ID","prediction","model")]
  pf1 <- pivot_wider(pf1, id_cols = OPEN_ID, names_from = model, values_from = prediction)
  pf1$q_benchmark <- round(ecdf(pf1$benchmark)(pf1$benchmark), 2)
  pf1$q_saturated <- round(ecdf(pf1$saturated)(pf1$saturated), 2)
  pf1 <- left_join(rename(summarize(group_by(pf1, q_benchmark), mean_pred_bench = mean(benchmark)), q = q_benchmark),
                   rename(summarize(group_by(pf1, q_saturated), mean_pred_satur = mean(benchmark)), q = q_saturated))
  pf1$mask <- rf1$mask[1]
  pf1$scales <- rf1$scales[1]
  pf <- bind_rows(pf,pf1)
}
rf$model <- factor(rf$model)
rf$mask <- factor(rf$mask)
rf$scales <- factor(rf$scales)
pf$mask <- factor(pf$mask)
pf$scales <- factor(pf$scales)

rmask <- grepl("Main", rf$scales)
z1 <- ggplot(rf[rmask,]) + 
  geom_abline(slope = 1, intercept = 0, linewidth = 0.3, linetype = "dashed") + 
  geom_line(aes(x = fpr, y = tpr, color = model), stat = "identity") +
  labs(y = "True Positive Rate", x = "False Positive Rate", subtitle = rf$scales[rmask][1]) +
  scale_x_continuous(expand = c(0.01,0.01)) + 
  scale_y_continuous(expand = c(0.01,0.01)) + 
  scale_color_manual(values = c("grey60", "black")) +
  theme_bw() + 
  theme(panel.grid = element_blank(), 
        legend.position = "none",
        axis.text = element_blank(),
        axis.ticks = element_blank(),
        axis.title = element_text(size = 9),
        plot.title = element_text(hjust = 0.5),
        strip.background = element_blank()) + 
  facet_grid(cols = vars(mask))

pmask <- grepl("Main", pf$scales)
z2 <- ggplot(pf[pmask,]) +  
  geom_abline(intercept = 0, slope = 1, linetype = 'solid', color = 'gray', size = 0.2) + 
  geom_point(aes(x = mean_pred_bench, y = mean_pred_satur), size = 1, alpha = 0.5) + 
  labs(y = "Propensity Scores\nfrom Saturated Model", 
       x = "Propensity Scores\nfrom Benchmark Model", subtitle = "  ") + 
  theme_bw() + 
  theme(panel.grid = element_blank(), 
        legend.position = "none",
        axis.text = element_blank(),
        axis.ticks = element_blank(),
        axis.title = element_text(size = 9),
        plot.title = element_text(hjust = 0.5),
        strip.background = element_blank()) + 
  facet_grid(cols = vars(mask))

rmask <- grepl("SCALES", rf$scales)
z3 <- ggplot(rf[rmask,]) + 
  geom_abline(slope = 1, intercept = 0, linewidth = 0.3, linetype = "dashed") + 
  geom_line(aes(x = fpr, y = tpr, color = model), stat = "identity") +
  labs(y = "True Positive Rate", x = "False Positive Rate", subtitle = rf$scales[rmask][1]) +
  scale_x_continuous(expand = c(0.01,0.01)) + 
  scale_y_continuous(expand = c(0.01,0.01)) + 
  scale_color_manual(values = c("grey60", "black")) +
  theme_bw() + 
  theme(panel.grid = element_blank(), 
        legend.position = "none",
        axis.text = element_blank(),
        axis.ticks = element_blank(),
        axis.title = element_text(size = 9),
        plot.title = element_text(hjust = 0.5),
        strip.background = element_blank()) + 
  facet_grid(cols = vars(mask))

pmask <- grepl("SCALES", pf$scales)
z4 <- ggplot(pf[pmask,]) +  
  geom_abline(intercept = 0, slope = 1, linetype = 'solid', color = 'gray', size = 0.2) + 
  geom_point(aes(x = mean_pred_bench, y = mean_pred_satur), size = 1, alpha = 0.5) + 
  labs(y = "Propensity Scores\nfrom Saturated Model", 
       x = "Propensity Scores\nfrom Benchmark Model", subtitle = "  ") + 
  theme_bw() + 
  theme(panel.grid = element_blank(), 
        legend.position = "none",
        axis.text = element_blank(),
        axis.ticks = element_blank(),
        axis.title = element_text(size = 9),
        plot.title = element_text(hjust = 0.5),
        strip.background = element_blank()) + 
  facet_grid(cols = vars(mask))

if(to.save){
  pdf(paste0(pdir,"/fgB2.pdf"), width=5.9, height=3.5, family = "Times")
  print(plot_grid(ggdraw(z1), NULL, ggdraw(z2), ggdraw(z3), NULL, ggdraw(z4),
                  rel_widths =  c(0.98,0.05,1.02),
                  nrow = 2, ncol = 3, labels=c("A","","","B","","")))
  dev.off()
}